#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient
import sys


def addJCR():
    SERVER = 'localhost'
    PORT = 27017
    DB_NAME = "journal"
    COLLECTION_NAME = "journal"

    connection = MongoClient(SERVER, PORT)
    db = connection[DB_NAME]
    collection = db[COLLECTION_NAME]

    infile = open(sys.argv[1], "r")
    for line in infile:
        line = line.rstrip()
        if line != "":
            addtoset = {}
            splitline = line.split("\t")
            FullTitle = splitline[0].strip()
            ISOAbbr = splitline[1].strip()
            JCRAbbr = splitline[2].strip()
            issn = splitline[3].strip()
            Publisher = splitline[7].strip()
            PublisherAddress = splitline[8].strip()
            Category = splitline[9].strip()
            if FullTitle != "":
                addtoset["FullTitle"] = FullTitle
            if ISOAbbr != "":
                addtoset["ISOAbbr"] = ISOAbbr
            if JCRAbbr != "":
                addtoset["JCRAbbr"] = JCRAbbr
            if Publisher != "":
                addtoset["Publisher"] = Publisher
            if PublisherAddress != "":
                addtoset["PublisherAddress"] = PublisherAddress
            if Category != "":
                Categories = Category.split("|")
                if len(Categories) == 1:
                    addtoset["Category"] = Categories[0]
                else:
                    addtoset["Category"] = {"$each": Categories}
            collection.update_one({"$or": [{"pISSN": issn}, {"eISSN": issn}]}, {"$addToSet": addtoset})
    connection.close()
    infile.close()


def addTitleJCR():
    SERVER = 'localhost'
    PORT = 27017
    DB_NAME = "journal"
    COLLECTION_NAME = "journal"

    connection = MongoClient(SERVER, PORT)
    db = connection[DB_NAME]
    collection = db[COLLECTION_NAME]

    infile = open(sys.argv[1], "r")
    for line in infile:
        line = line.rstrip()
        if line != "":
            addtoset = {}
            splitline = line.split("\t")
            FullTitle = splitline[0].strip()
            ISOAbbr = splitline[1].strip()
            JCRAbbr = splitline[2].strip()
            issn = splitline[3].strip()
            Publisher = splitline[7].strip()
            PublisherAddress = splitline[8].strip()
            Category = splitline[9].strip()
            if FullTitle != "":
                addtoset["FullTitle"] = FullTitle
            if ISOAbbr != "":
                addtoset["ISOAbbr"] = ISOAbbr
            if JCRAbbr != "":
                addtoset["JCRAbbr"] = JCRAbbr
            if issn != "" and issn != "****-****":
                addtoset["ISSNSet"] = issn
            if Publisher != "":
                addtoset["Publisher"] = Publisher
            if PublisherAddress != "":
                addtoset["PublisherAddress"] = PublisherAddress
            if Category != "":
                Categories = Category.split("|")
                if len(Categories) == 1:
                    addtoset["Category"] = Categories[0]
                else:
                    addtoset["Category"] = {"$each": Categories}
            collection.update_one({"$or": [{"FullTitle": {"$regex": "^" + FullTitle + "$", "$options": "$i"}},
                                           {"ISOAbbr": {"$regex": "^" + ISOAbbr + "$", "$options": "$i"}},
                                           {"MedAbbr": {"$regex": "^" + ISOAbbr + "$", "$options": "$i"}}]},
                                  {"$addToSet": addtoset})
    connection.close()
    infile.close()


def addJCRmiss():
    SERVER = 'localhost'
    PORT = 27017
    DB_NAME = "journal"
    COLLECTION_NAME = "journal"

    connection = MongoClient(SERVER, PORT)
    db = connection[DB_NAME]
    collection = db[COLLECTION_NAME]

    infile = open(sys.argv[2], "r")
    for line in infile:
        line = line.rstrip()
        if line != "":
            set = {}
            splitline = line.split("\t")
            FullTitle = splitline[0].strip()
            ISOAbbr = splitline[1].strip()
            JCRAbbr = splitline[2].strip()
            issn = splitline[3].strip()
            Publisher = splitline[7].strip()
            PublisherAddress = splitline[8].strip()
            Category = splitline[9].strip()
            if FullTitle != "":
                set["FullTitle"] = [FullTitle]
            if ISOAbbr != "":
                set["ISOAbbr"] = [ISOAbbr]
            if JCRAbbr != "":
                set["JCRAbbr"] = [JCRAbbr]
            if issn != "" and issn != "****-****":
                set["ISSNSet"] = [issn]
            if Publisher != "":
                set["Publisher"] = [Publisher]
            if PublisherAddress != "":
                set["PublisherAddress"] = [PublisherAddress]
            if Category != "":
                Categories = Category.split("|")
                set["Category"] = Categories
            collection.insert_one(set)
    connection.close()
    infile.close()


if __name__ == "__main__":
    # addJCR()
    addTitleJCR()
    addJCRmiss()
